from django.contrib.auth.models import User
from apps.rss_feeds.models import Feed, Story
from apps.reader.models import UserSubscription, UserStory
from apps.analyzer.models import Category, FeatureCategory
import datetime
import re
import math

    def entry_features(self, entry):
        splitter=re.compile('\\W*')
        f={}
  
        # Extract the title words and annotate
        titlewords=[s.lower() for s in splitter.split(entry['title']) 
                              if len(s)>2 and len(s)<20]
        
        for w in titlewords: f['Title:'+w]=1
  
        # Extract the summary words
        summarywords=[s.lower() for s in splitter.split(entry['summary']) 
                                if len(s)>2 and len(s)<20]

        # Count uppercase words
        uc=0
        for i in range(len(summarywords)):
            w=summarywords[i]
            f[w]=1
            if w.isupper(): uc+=1
    
            # Get word pairs in summary as features
            if i<len(summarywords)-1:
                twowords=' '.join(summarywords[i:i+1])
                f[twowords]=1
    
        # Keep creator and publisher whole
        f['Publisher:'+entry['publisher']]=1

        # UPPERCASE is a virtual word flagging too much shouting  
        if float(uc)/len(summarywords)>0.3: f['UPPERCASE']=1
  
        return f
